% Empirical exercise 1: box office


clear all
warning off
clc

% 0. parameter setup
load emp1.mat
E = [10 20 30 40];
B = 1000;
n = length(y);
M = size(IM,1);
TUN = 0:0.1:5; 

% 1. forecasting comparision
tic
for j = 1:length(E)    
    ne = E(j);
    parfor i = 1:B            
        % 2. training and evaluation set
        IN = randperm(n);
        INt = IN(1:n-ne);
        INe = IN(n-ne+1:end);
        Zt = X(INt,:);
        yt = y(INt,:);
        Ze = X(INe,:);
        ye = y(INe,:);  
        % 3. estimation        
        Xt = zeros(n-ne,M);         % obtain forecasts
        F = zeros(ne,M);
        for t = 1:M
            bt = Zt(:,IM(t,:))\yt;
            Xt(:,t) = Zt(:,IM(t,:))*bt;
            F(:,t) = Ze(:,IM(t,:))*bt;
        end        
        f01 = csr_fun(yt,Zt,Ze,10);         % CSR_10
        f02 = csr_fun(yt,Zt,Ze,15);         % CSR_15
        f03 = peLASSO_fun(yt,Xt,F,TUN);     % peLASSO
        Rt = pma(yt,Zt,IM);                 % PMA
        f1 = Ze*Rt.b;
        blasso = est_cv(yt,Xt,TUN,1);
        f2 = F*blasso;                      % LASSO
        bridge = est_cv(yt,Xt,TUN,2);        
        f3 = F*bridge;                      % RIDGE
        w = est_cv(yt,Xt,TUN,3);        
        f4 = F*w;                           % L2RELAX no shrinkage
        w = est_cv(yt,Xt,TUN,4);        
        f5 = F*w;                           % L2RELAX linear shrinkage
        w = est_cv(yt,Xt,TUN,5);        
        f6 = F*w;                           % L2RELAX nonlinear shrinkage
        % 4. compute risks
        tmp = [f1 f01 f02 f03 f2 f3 f4 f5 f6];
        U = repmat(ye,1,size(tmp,2)) - tmp;
        MSFEt(i,:) = (mean(U.^2));
        MAEt(i,:) = mean(abs(U));           
    end
    SAVE1{j} = MSFEt;
    SAVE2{j} = MAEt;    
end
toc

% 5. show results
for i = 1:4
    MSFE(i,:) = mean(SAVE1{i});
    MAE(i,:) = mean(SAVE2{i});
end
R = [MSFE;MAE];
R = R ./repmat(R(:,1),1,size(R,2));
VN = {'10','20','30','40'};
prt_tab(R,[VN VN],3)

% 6. show weight results
X = X(1:end-10,:);
y = y(1:end-10,:);
M = size(IM,1);
TUN = 1;
for t = 1:M
    bt = X(:,IM(t,:))\y;
    F(:,t) = X(:,IM(t,:))*bt;
end
w = est_cv(y,F,TUN,3);
IN = [0.2 0.05 0 -0.05];
figure (1)
plotline(IN,sort(w,'descend'))

%% nested functions
function f = csr_fun(y,Xt,Xe,TUN)
% This program estimates forecasts of the complete subset regression by
% Elliott et al. (2013). TUN is the number kept variables. 

    % 0. house cleaning
    warning off
    IN0 = [1 18 19 29];     % variables that should be kept in all models
    [~,K] = size(Xt);       % the first term is the intercept
    IN1 = zeros(1,K);
    IN1(IN0) = 1;
    IN2 = find(IN1==0);
    KEEP = 10000;           % randomly keep 10000 models
    MAT0 = nchoosek(1:(K-length(IN0)),TUN-length(IN0));   % raw matrix for sub-models
    IN = datasample(1:size(MAT0,1),KEEP,'Replace',false);    
    MAT1 = MAT0(IN,:);
    MAT = zeros(KEEP,TUN);
    for i = 1:KEEP
        MAT(i,:) = [IN0 IN2(MAT1(i,:))];
    end    
    
    % 1. obtain estimates for each subset model
    F = zeros(size(Xe,1),KEEP);
    for i = 1:KEEP
        INt = MAT(i,:);        
        F(:,i) = Xe(:,INt)*(Xt(:,INt)\y);
    end
    
    % 2. simple average    
    f = mean(F,2);
end


function f = peLASSO_fun(y,Xt,Xe,TUN)
% This program estimates forecasts of the peLASSO method by Diebold and
% Shin (2019). TUN is the vector of tuning parameters for both steps. 

    % 0. house clean
    
    % 1. first step, LASSO estimation
    [BM1,INFO1] = lasso(Xt,y,'Intercept',false,'CV',5);
    b1 = BM1(:,INFO1.IndexMinMSE);
    IN = (b1~=0);                   % obtain non-zero X index
    K = size(Xt,2);
    
    % 2. second step, Ridge estimation
    k = sum(IN);
    if k == 0
        w = ones(K,1)/K;            % if all weights are shrunk to zero, conduct simple averaging
    else
        Xt2 = Xt(:,IN);
        yt = y - mean(Xt2,2);       % reformulate the criterion function following the appendix A of DS (2019).
        w0 = ridge_opt(yt,Xt2,TUN);
        w = zeros(K,1);
        w(IN) = w0;
    end    
    
    % 3. obtain forecast
    f = Xe*w;
end

function [w,tun] = ridge_opt(y,X,TUN)
% find the optimal lambda for conventional ridge estimator
    cvMse = zeros(length(TUN),1);            
    for i = 1:length(TUN)
        regf=@(XTRAIN,ytrain,XTEST)(ridge_fun(XTRAIN,ytrain,XTEST,TUN(i)));
        cvMse(i,1) = crossval('mse',X,y,'predfun',regf,'kfold',5);
    end 
    [~,IN] = min(cvMse);
    tun = TUN(IN);
    w = (X'*X+tun*eye(size(X,2)))^(-1)*(X'*y);  
end

function yfit = ridge_fun(XTRAIN,ytrain,XTEST,tun)
% cross validation estimation function
    warning off
    N = size(XTRAIN,2);    
    w = (XTRAIN'*XTRAIN+tun*eye(N))^(-1)*(XTRAIN'*ytrain);    
    yfit = XTEST*w;    
end






